####################################################
# Name: R Replication Code for Figures in 
#	Who Punishes Extremist Nominees?
# Author: Dan Thompson and Andy Hall
# Date: December 2017
# Notes: Run the STATA code first
####################################################

setwd("~/Dropbox/BaseTurnout-AM/hall_thompson_replication_file")
library(tidyverse)
library(haven)


##
# We start by building a function that puts together multiple plots.
# This code was taken directly from here: 
# http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
##

# Define multiplot function someone suggested online
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
	library(grid)

	# Make a list from the ... arguments and plotlist
	plots = c(list(...), plotlist)
	numPlots = length(plots)

	# If layout is NULL, then use 'cols' to determine layout
	if (is.null(layout)) {
		# Make the panel
	# ncol: Number of columns of plots
	# nrow: Number of rows needed, calculated from # of cols
	layout = matrix(seq(1, cols * ceiling(numPlots/cols)),
		ncol=cols, nrow=ceiling(numPlots/cols))
	}

 if (numPlots==1) {
 		print(plots[[1]])
 	} else {

    # Set up the page
	grid.newpage()
	pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {

		# Get the i,j matrix positions of the regions that contain this subplot
		matchidx = as.data.frame(which(layout==i, arr.ind=TRUE))
		print(plots[[i]], vp=viewport(layout.pos.row=matchidx$row,
			layout.pos.col=matchidx$col))
    }
  }
}


##
# Figure 2
##

# Set the bin size
bin_size_rd = 10

# Bring in the data
rd_main = read_dta("rd_master_hs.dta") %>%
	select(treat, turnout_party_share, rv) %>%
	filter(!is.na(turnout_party_share)) %>%

	# Create the bins
	group_by(treat) %>%
	mutate(
		sort_rv = abs(rv),
		bin = ceiling(rank(sort_rv)/bin_size_rd)
	) %>%
	select(-sort_rv) %>%
	ungroup() %>%

	# Estimate means of the running variable and outcome
	# within each bin 
	group_by(treat, bin) %>%
	mutate(
		bin_turnout = mean(turnout_party_share),
		bin_rv = mean(rv)
	) %>%

	# Run the plot
	ggplot() +
		geom_vline(xintercept=0, col="red", alpha=0.3, size=1.5) +
		geom_point(aes(x=bin_rv, y=bin_turnout), col="gray40", 
			size=3, alpha=0.3) +
		stat_smooth(aes(x=rv, y=turnout_party_share, group=treat), 
			method="lm", fill="royalblue") +
		xlab("Extremist Win Margin in Primary") +
		ylab("Party's Share of General-Election Turnout") +
		xlim(-.2, .2) + coord_cartesian(ylim=c(.4, .6)) +
		annotate("text", x=0.135, y=0.59, label="N = 171", size=5) +
		annotate("text", x=0.135, y=0.57, label="Bin Size = 10", size=5) + 
		theme(axis.text=element_text(size=12),
        	axis.title=element_text(size=14))

# Print out the plot
rd_main


##
# Figure 3
##

# Set up a dataframe of the effects
rd_comparison = data.frame(
		model = rep(c("Local Linear", "3rd Order", "5th Order", "CCT"), 2),
		Outcome = c(rep("vote", 4), rep("turnout", 4)),
		estimate = c(-.13, -.07, -.10, -.15, -.11, -.08, -.10, -.11),
		se = c(.04, .03, .04, .04, .04, .03, .03, .04)
	) %>%
	
	# Define the confidence interval
	mutate(
		lower = estimate - se*2,
		upper = estimate + se*2,
	) %>%

	# Run the plot
	ggplot(aes(x=model, group=Outcome, col=Outcome)) +
	      geom_point(aes(y=estimate, shape=Outcome), 
	      	size=2, position=position_dodge(width=.5)) +
	      geom_errorbar(aes(ymin=lower, ymax=upper), 
	      	width=.01, position=position_dodge(width=.5)) +
		ylim(-.25, .05) +
	    xlab("") +
	    ylab("Estimated Effect") +
	    theme(axis.text.x=element_text(size=12))

# Print out the plot
rd_comparison


##
# Figure 4
##

# Build the plot using the output from Stata
turnout_diff = read_dta("turnout_rate_diff_fig.dta") %>%
	ggplot(aes(x=bw)) +
		geom_line(aes(y=0), col="red", linetype=2, alpha=0.5) +
		geom_line(aes(y=diff_in_y), col="dodgerblue", size=1) +
		geom_ribbon(aes(ymin=lower, ymax=upper), alpha=0.3, fill="dodgerblue") +
		xlab("RD Bandwidth") + 
		ylab("Opposing-Party Minus Own-Party Turnout Rate")

# Print out the plot
turnout_diff


##
# Figure A1
##

# Define a function for the bandwidth plot
bwPlot <- function(model_name, xlabel=TRUE, ylabel=TRUE){
	xtitle = ifelse(xlabel, "RD Bandwidth", "")
	ytitle = ifelse(ylabel, "Effect on Turnout Share", "")
	fig = read_dta("spec_for_r_hs.dta") %>%
		filter(model==model_name) %>%
		ggplot(aes(x=bw)) +
			geom_line(aes(y=0), col="red", linetype=2, alpha=0.3) +
			geom_line(aes(y=est), col="dodgerblue") +
			geom_ribbon(aes(ymin=lower, ymax=upper), alpha=0.3, fill="dodgerblue") +
			xlab(xtitle) + ylab(ytitle) +
			labs(title=model_name) +
			theme(plot.title = element_text(hjust = 0.5)) + 
			ylim(-.3, .1)
	return(fig)
}

# Run the bandwidth plot for each model
bw_local_linear = bwPlot("Local Linear", xlabel=F)
bw_3rd = bwPlot("3rd Order", xlabel=F, ylabel=F)
bw_5th = bwPlot("5th Order")
bw_cct = bwPlot("CCT", ylabel=F)

# Put all of the plots together and save them as a pdf
multiplot(bw_local_linear, bw_5th, bw_3rd, bw_cct, cols=2)

